# coding:utf8

'''
#全国企业信用信息公示系统（内蒙古）
#维护肖迪
'''
from scpy.logger import get_logger

logger = get_logger(__file__)
import pycurl
import urllib
import re
from utils import kill_captcha
import StringIO
import random
from bs4 import BeautifulSoup
import json
import table
import requests


# def curl(url, data='', cookie='', debug=False):  #抓取函数[get,post]
#     UserAgent = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
#     s = StringIO.StringIO()
#     c = pycurl.Curl()
#     c.setopt(c.URL, url)
#     c.setopt(c.REFERER, 'http://gsxt.scaic.gov.cn/')
#     c.setopt(pycurl.CONNECTTIMEOUT, 60)
#     c.setopt(pycurl.TIMEOUT, 120)
#     if cookie:
#         c.setopt(c.COOKIEJAR, "cookie_file_name3")
#     c.setopt(c.COOKIEFILE, "cookie_file_name3")
#     c.setopt(pycurl.FOLLOWLOCATION, True)
#     if data:
#         c.setopt(c.POSTFIELDS, urllib.urlencode(data))
#     c.setopt(pycurl.ENCODING, 'gzip')
#     c.setopt(c.HTTPHEADER, ['Host:www.nmgs.gov.cn:7001', 'Upgrade-Insecure-Requests:1',
#                             'User-Agent:Googlebot/2.1 (+http://www.googlebot.com/bot.html)',
#                             'Origin:http://gsxt.scaic.gov.cn'])
#     c.setopt(c.WRITEDATA, s)
#     c.perform()
#     c.close()
#     return s.getvalue()
def curl(url, data=''):
    if data:
        return requests.post(url, timeout=60, data=data).content
    else:
        return requests.get(url, timeout=60).content


def req_alter_http(url_tag):
    """
    包含url的html tag
    :param url_tag:
    :return:
    """
    url_tag = re.findall('.*(http.*)\'', url_tag)
    if url_tag:
        res = curl(url=url_tag[0])
        return res
    else:
        return ''


def parse_alter(html):
    raw_alter_table = re.findall('<table.*?</table>', html, re.S)
    raw_alter_table = raw_alter_table[0] if raw_alter_table else ''
    tr_s = re.findall('<tr.*?>.*?</tr>', raw_alter_table, re.S)
    th_head_s = re.findall('<th.*?>(.*?)</th>', tr_s[1], re.S) if len(tr_s) > 1 else []
    tr_s_2 = tr_s[2:] if len(tr_s) > 2 else []
    res_str = ''
    for item in tr_s_2:
        td_s = re.findall('<td.*?>(.*?)</td>', item, re.S)
        a_item_res = reduce(lambda x,y:x+"; "+y, map(lambda _:reduce(lambda __, ___: __+": "+___, _), zip(th_head_s, td_s)))
        res_str += a_item_res

    return res_str


def verify(name, **args):
    loop_num = 0
    global detail_url
    s = requests.Session()
    while 1:
        loop_num += 1
        try:
            # if 1:
            # code = curl('http://www.nmgs.gov.cn:7001/aiccips/CheckEntContext/showInfo.html',cookie=1)
            # print code
            verify_url = "http://www.nmgs.gov.cn:7001/aiccips//verify.html?random=%d" % random.random()
            verify_image = s.get(verify_url, timeout=60).content
            verify = kill_captcha(verify_image, 'nmg', 'png')
            print verify
            # open('/Users/xiaodi/Desktop/工商/crawler-service/saic_service/crawler/123.png','w').write(verify_image)
            data = {"textfield": name, "code": verify}
            url_url = 'http://www.nmgs.gov.cn:7001/aiccips/CheckEntContext/checkCode.html'
            html = s.post(url_url, data=data, timeout=60).content
            html_json = json.loads(html)
            print html_json
            textfield = html_json['textfield']
            data = {"textfield": textfield, "code": verify}
            search_url = 'http://www.nmgs.gov.cn:7001/aiccips/CheckEntContext/showInfo.html'
            html = s.post(search_url, data=data, timeout=60).content
            try:
                detail_url = re.findall('<a href="\.\.(/GSpublicity/GSpublicityList\.html.+?)"', html)[0]
                detail_url = 'http://www.nmgs.gov.cn:7001/aiccips' + detail_url
            except:
                logger.info('没有对应的公司')
                return ''

            detail_html = s.get(detail_url, timeout=60).content
            return detail_html

        except Exception, e:
            logger.exception(e)
            if loop_num >= 30:
                logger.info('验证码尝试了30次，退出尝试')
                logger.error('保存word在exception日志:%s' % name)
                raise ValueError
                break
            logger.info('验证码错误，正在识别,错误次数%s' % loop_num)
            continue


def run(detail_html, **args):
    entNo = re.findall('name="entNo" value="(.+?)"', detail_html)[0]
    entType = re.findall('id="entType" name="entType" value="(.+?)"', detail_html)[0]
    regOrg = re.findall('id="regOrg" name="regOrg" value="(.+?)"', detail_html)[0]
    data = {"entNo": entNo, "entType": entType, "regOrg": regOrg}
    print data
    report_html = curl('http://www.nmgs.gov.cn:7001/aiccips/BusinessAnnals/BusinessAnnalsList.html', data)
    html1 = curl('http://www.nmgs.gov.cn:7001/aiccips/GSpublicity/GSpublicityList.html?service=entCheckInfo', data)
    html2 = curl('http://www.nmgs.gov.cn:7001/aiccips/GSpublicity/GSpublicityList.html?service=cipUnuDirInfo', data)
    html3 = curl('http://www.nmgs.gov.cn:7001/aiccips/GSpublicity/GSpublicityList.html?service=cipSpotCheInfo', data)

    detail_html = detail_html + '</br>' + html1 + '</br>' + html2 + '</br>' + html3

    tables = detail_html.split('</br>')
    for j in tables:
        word = re.findall('<th colspan="\d+?" style="text-align:center;">(.+?)</th>', j)
        print word

        if '股东信息' in j:
            shareHolderList = table.index('股东信息', j)
            if shareHolderList:
                for i in shareHolderList:
                    if 'returnalert' in i['shareHolderdetail']:
                        subConam = ""
                        conDate = ""
                        fundedRatio = ""
                        regCapCur = ""
                        country = ""
                        share_url = ""
                    else:
                        print i['shareHolderdetail']
                        share_url = re.findall("window.open\('(.+?)'\)", i['shareHolderdetail'])
                        html = curl(share_url[0]) if share_url else ''
                        html = re.findall('<table[\s\S]+?</table>', html) if html else ''
                        result_list = table.investment_information(html[0]) if html else []
                        if result_list:
                            subConam = result_list[0]
                            conDate = ""
                            fundedRatio = ""
                            regCapCur = result_list[1]
                            country = ""
                        else:
                            subConam = ""
                            conDate = ""
                            fundedRatio = ""
                            regCapCur = ""
                            country = ""
                    i['shareHolderdetail'] = share_url
                    i['subConam'] = subConam
                    i['conDate'] = conDate
                    i['fundedRatio'] = fundedRatio
                    i['regCapCur'] = regCapCur
                    i['country'] = country

        try:
            if '基本信息' == word[0].strip():
                basicList = table.index(word[0].replace(' ', ''), j)
            # if '股东信息' == word[0]:
            #    shareHolderList = table.index('股东信息',j)
            if '主要人员信息' == word[0]:
                personList = table.index(word[0].replace(' ', ''), j)
            if '变更信息' == word[0]:
                alterList = table.index(word[0].replace(' ', ''), j)
                if alterList:
                    for a_alter in alterList:
                        for k in ['altAf', 'altBe']:
                            a_alter_content = a_alter.get(k, '')
                            if 'http' in a_alter_content:
                                raw_alter_html = req_alter_http(a_alter_content)
                                a_alter[k] = parse_alter(raw_alter_html)

            if '分支机构信息' == word[0]:
                filiationList = table.index(word[0].replace(' ', ''), j)
            if '清算信息' == word[0]:
                liquidationList = table.index(word[0].replace(' ', ''), j)
            if '经营异常' == word[0] or '经营异常信息' == word[0]:
                abnormalOperation = table.index(word[0].replace(' ', ''), j)
            if '抽查检查信息' == word[0]:
                checkMessage = table.checkMessage(word[0].replace(' ', ''), j)
        except:
            print word
            continue
    try:
        print basicList
    except:
        basicList = []
    try:
        print shareHolderList
    except:
        shareHolderList = []
    try:
        print personList
    except:
        personList = []
    try:
        print alterList
    except:
        alterList = []
    try:
        print filiationList
    except:
        filiationList = []
    try:
        print liquidationList
    except:
        liquidationList = []
    try:
        print abnormalOperation
    except:
        abnormalOperation = []
    try:
        flags = checkMessage
    except:
        checkMessage = []
    punishBreakList = []
    punishedList = []
    alidebtList = []
    entinvItemList = []
    frinvList = []
    frPositionList = []
    caseInfoList = []
    sharesFrostList = []
    sharesImpawnList = []
    morDetailList = []
    morguaInfoList = []

    report_url = re.findall('href="(http://www\.nmgs\.gov\.cn:7001/aiccips/BusinessAnnals/view\.html.+?)">(\d+)',
                            report_html)
    yearReportList = []
    yearSource = []
    for i in report_url:
        year = i[1]
        year_url = i[0]
        html = curl(year_url)
        # print html
        table_list = re.findall('<table[\s\S]+?</table>', html)
        for j in table_list:
            if '统一社会信用代码' in j:
                j = re.sub('<span.+?>', '', j)
                j = j.replace('</span>', '')
                report_basic = table.report_basic(j)
            if '网站或网店信息' in j:
                j = re.sub('<span.+?>', '', j)
                j = j.replace('</span>', '')
                report_website = table.report_website(j)
            if '企业资产状况信息' in j:
                j = re.sub('<span.+?>', '', j)
                j = j.replace('</span>', '')
                report_assetsInfo = table.report_assetsInfo(j)
            if '股东及出资信息' in j:
                report_investorInformations = table.report_investorInformations(j)
            if '股权变更信息' in j:
                j = re.sub('<span.+?>', '', j)
                j = j.replace('</span>', '')
                report_equityChangeInformations = table.report_equityChangeInformations(j)
            if '修改记录' in j:
                j = re.sub('<span.+?>', '', j)
                j = j.replace('</span>', '')
                report_changeRecords = table.report_changeRecords(j)
            try:
                print report_basic
            except:
                report_basic = {}
            try:
                print report_website
            except:
                report_website = {}
            try:
                print report_assetsInfo
            except:
                report_assetsInfo = {}
            try:
                print report_investorInformations
            except:
                report_investorInformations = []
            try:
                print report_equityChangeInformations
            except:
                report_equityChangeInformations = []
            try:
                print report_changeRecords
            except:
                report_changeRecords = []
        dit1 = {"year": year, "baseInfo": report_basic, "website": report_website,
                "investorInformations": report_investorInformations, "assetsInfo": report_assetsInfo,
                "equityChangeInformations": report_equityChangeInformations, "changeRecords": report_changeRecords}
        dit_soruce = {"year": year, "html": html}
        yearSource.append(dit_soruce)
        yearReportList.append(dit1)

    alldata = {'province': 'nmg', "abnormalOperation": abnormalOperation, "basicList": basicList,
               "shareHolderList": shareHolderList, "personList": personList, "punishBreakList": punishBreakList,
               "punishedList": punishedList, "alidebtList": alidebtList, "entinvItemList": entinvItemList,
               "frinvList": frinvList, "frPositionList": frPositionList, "alterList": alterList,
               "filiationList": filiationList, "caseInfoList": caseInfoList, "sharesFrostList": sharesFrostList,
               "sharesImpawnList": sharesImpawnList, "morDetailList": morDetailList, "morguaInfoList": morguaInfoList,
               "liquidationList": liquidationList, "yearReportList": yearReportList, "checkMessage": checkMessage}
    if args.get("type") == 1:
        result = {"province": "nmg", "type": 1, "html": detail_html, "yearList": yearSource,
                  "keyword": args.get("searchkey", "none"), "companyNme": basicList[0]['enterpriseName']}
        companyUrl = {"url": detail_url, "method": "get", "companyName": basicList[0]['enterpriseName'],
                      "province": "nmg"}
        return (result, alldata, companyUrl)
    return alldata


def search(key):
    html = verify(key)
    # print html
    if html:
        result = run(html)
        return result
    else:
        return {}


def search2(key):
    html = verify(key)
    # print html
    if html:
        result = run(html, searchkey=key, type=1)
        return result
    else:
        return ()


def search3(data):
    global detail_url
    url = data.get('url')
    detail_url = url
    html = curl(url)
    key = data.get("companyName", "")
    if html and key:
        result = run(html, searchkey=key, type=1, companyUrl=url)
        return result
    else:
        raise Exception("error")

# print verify(u'内蒙古鄂尔多斯投资控股集团有限公司')
if __name__ == '__main__':
    # html_temp = search2('内蒙古鄂尔多斯投资控股集团有限公司')
    # # print type(html_temp)
    # print json.dumps(html_temp, ensure_ascii=False, indent=4)
    # # search3('http://www.nmgs.gov.cn:7001/aiccips/GSpublicity/GSpublicityList.html?service=entInfo_fOxo+0EVbv9UiTa6Flau8+JAOqBaqHZm7FrLvfckOODAF1bfdNP55ZWCOmQJ2JU4-PWq3IFfn7+S/LyUfq1SeTA==')
    # # 内蒙古鄂尔多斯投资控股集团有限公司
    # req_data = {"companyName" : "巴林右旗太升民用爆破器材有限责任公司", "url" : "http://www.nmgs.gov.cn:7001/aiccips/GSpublicity/GSpublicityList.html?service=entInfo_V0V6xN70vF6wGnHqW3zGoRxK0Sf1xfu3bLlYOjyVYuZ0+RCu3qaXNpur8jdAYh6H-nfhc965hjeay33d0J1uhtg==", "province" : "nmg", "method" : "get" }
    req_data = {"companyName" : "内蒙古鄂尔多斯投资控股集团有限公司", "url" : "http://www.nmgs.gov.cn:7001/aiccips/GSpublicity/GSpublicityList.html?service=entInfo_UhnfFpKxVeU6XZqi0StPiVCpf0vwx6QAFc72ArOKu4UXfoThY5XZeetf18i7CQoi-2ornev0N7BAooeFExGfh5A==", "province" : "nmg", "method" : "get" }

    print json.dumps(search3(req_data), ensure_ascii=False, indent=4)